-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[SystemZ] Handle f16 load positive/negative/complement without libcalls. #136286
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-systemz Author: Jonas Paulsson (JonPsson1) ChangesThis can be done directly with the (64-bit) target instruction as only the sign bit is changed. Full diff: https://github.com/llvm/llvm-project/pull/136286.diff 10 Files Affected:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 75cd5a319557d..746e2b1a88a17 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -554,6 +554,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Legal);
+ for (auto Op : {ISD::FNEG, ISD::FABS})
+ setOperationAction(Op, MVT::f16, Legal);
}
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 7775f456bbdc1..12e99f7e5f47a 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -378,8 +378,10 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
}
// Generic form, which does not set CC.
def LPDFR : UnaryRRE<"lpdfr", 0xB370, fabs, FP64, FP64>;
-let isCodeGenOnly = 1 in
+let isCodeGenOnly = 1 in {
+ def LPDFR_16 : UnaryRRE<"lpdfr", 0xB370, fabs, FP16, FP16>;
def LPDFR_32 : UnaryRRE<"lpdfr", 0xB370, fabs, FP32, FP32>;
+}
// Negative absolute value (Load Negative).
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
@@ -389,8 +391,10 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
}
// Generic form, which does not set CC.
def LNDFR : UnaryRRE<"lndfr", 0xB371, fnabs, FP64, FP64>;
-let isCodeGenOnly = 1 in
+let isCodeGenOnly = 1 in {
+ def LNDFR_16 : UnaryRRE<"lndfr", 0xB371, fnabs, FP16, FP16>;
def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32, FP32>;
+}
// Square root.
let Uses = [FPC], mayRaiseFPException = 1 in {
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
index 0cfdefe3bd61b..fe573f1e3587e 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -7,10 +7,9 @@
declare half @llvm.fabs.f16(half %f)
define half @f0(half %f) {
; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
+; CHECK: # %bb.0:
; CHECK-NEXT: lpdfr %f0, %f0
-; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NEXT: br %r14
%res = call half @llvm.fabs.f16(half %f)
ret half %res
}
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-02.ll b/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
index 4266a893e8a3b..752609ef6d00d 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -3,6 +3,18 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+; Test f16.
+declare half @llvm.fabs.f16(half %f)
+define half @f0(half %f) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lndfr %f0, %f0
+; CHECK-NEXT: br %r14
+ %abs = call half @llvm.fabs.f16(half %f)
+ %res = fneg half %abs
+ ret half %res
+}
+
; Test f32.
declare float @llvm.fabs.f32(float %f)
define float @f1(float %f) {
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll
index 29f2d06e75ff9..029ae2309cab9 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll
@@ -6,10 +6,9 @@
declare half @llvm.fabs.f16(half %f)
define half @f0(half %f) {
; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
+; CHECK: # %bb.0:
; CHECK-NEXT: lpdfr %f0, %f0
-; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NEXT: br %r14
%res = call half @llvm.fabs.f16(half %f)
ret half %res
}
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-04.ll b/llvm/test/CodeGen/SystemZ/fp-abs-04.ll
index afaf3f6d22ac8..fbb43b69371f3 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-04.ll
@@ -6,13 +6,9 @@
declare half @llvm.fabs.f16(half %f)
define half @f0(half %f) {
; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK-NEXT: lpdfr %f0, %f0
-; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
-; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
-; CHECK-NEXT: lcdfr %f0, %f0
-; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: lndfr %f0, %f0
+; CHECK-NEXT: br %r14
%abs = call half @llvm.fabs.f16(half %f)
%res = fneg half %abs
ret half %res
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-08.ll b/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
index 2b18abec8d555..e739bddd4f18f 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
@@ -8,13 +8,12 @@ declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
define half @f0(half %f1, half %f2, half %acc) {
; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
+; CHECK-NOT: brasl
+; CHECK: lcdfr %f{{[0-9]+}}, %f4
; CHECK: brasl %r14, __extendhfsf2@PLT
; CHECK: brasl %r14, __extendhfsf2@PLT
; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK-SCALAR: maebr %f0, %f9, %f8
+; CHECK-SCALAR: maebr %f0, %f8, %f10
; CHECK-VECTOR: wfmasb %f0, %f0, %f8, %f10
; CHECK: brasl %r14, __truncsfhf2@PLT
; CHECK: br %r14
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-10.ll b/llvm/test/CodeGen/SystemZ/fp-mul-10.ll
index 1ecf52fbde354..8f2cd23112cd0 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-10.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-10.ll
@@ -30,10 +30,10 @@ define half @f3_half(half %f1, half %f2, half %acc) {
; CHECK: brasl %r14, __extendhfsf2@PLT
; CHECK: wfmasb %f0, %f0, %f8, %f10
; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NOT: brasl
+; CHECK: lcdfr %f0, %f0
+; CHECK-NEXT: lmg
+; CHECK-NEXT: br %r14
%res = call half @llvm.fma.f16 (half %f1, half %f2, half %acc)
%negres = fneg half %res
ret half %negres
@@ -50,18 +50,17 @@ define float @f3(float %f1, float %f2, float %acc) {
define half @f4_half(half %f1, half %f2, half %acc) {
; CHECK-LABEL: f4_half:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
+; CHECK-NOT: brasl
+; CHECK: lcdfr %f0, %f4
; CHECK: brasl %r14, __extendhfsf2@PLT
; CHECK: brasl %r14, __extendhfsf2@PLT
; CHECK: brasl %r14, __extendhfsf2@PLT
; CHECK: wfmasb %f0, %f0, %f8, %f10
; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NOT: brasl
+; CHECK: lcdfr %f0, %f0
+; CHECK-NEXT: lmg
+; CHECK-NEXT: br %r14
%negacc = fneg half %acc
%res = call half @llvm.fma.f16 (half %f1, half %f2, half %negacc)
%negres = fneg half %res
diff --git a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
index a8fe8d5da7c8a..0e19d9647178f 100644
--- a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -6,10 +6,9 @@
; Test f16.
define half @f0(half %f) {
; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: lcdfr %f0, %f0
+; CHECK-NEXT: br %r14
%res = fneg half %f
ret half %res
}
diff --git a/llvm/test/CodeGen/SystemZ/fp-neg-02.ll b/llvm/test/CodeGen/SystemZ/fp-neg-02.ll
index 848c4740d8540..d0802878f8f8b 100644
--- a/llvm/test/CodeGen/SystemZ/fp-neg-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-neg-02.ll
@@ -5,10 +5,9 @@
; Test f16.
define half @f0(half %f) {
; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK: # %bb.0:
+; CHECK-NEXT: lcdfr %f0, %f0
+; CHECK-NEXT: br %r14
%res = fneg half %f
ret half %res
}
|
Was the ABI for f16 published anywhere public by the way? I'm trying to understand how |
It's not yet published (we're planning on doing this soon), but you're indeed correct that they are passed in the same registers as f32 and 64, and aligned at the MSB. (That's why the same set of instructions already works for both f32 and f64.) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
@@ -554,6 +554,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, | |||
setOperationAction(ISD::BITCAST, MVT::i16, Custom); | |||
setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); | |||
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Legal); | |||
for (auto Op : {ISD::FNEG, ISD::FABS}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor nit: maybe add FCOPYSIGN to the loop then?
This can be done directly with the (64-bit) target instruction as only the sign bit is changed.